cat(sprintf("\nScript 1: Wrangle H&R data (%s)\n", f_time_gmt()))

# Load data --------------------------------------------------------------------

# H&R data right before analyses (up to script 02_CovariateAnalysis.R, l. 357)
hr_l_raw <- readRDS(here("data", "hr", "ag_long.rds"))



# Wrangle data -----------------------------------------------------------------

hr_l <- copy(hr_l_raw)[
  # only keep variables of interest
  , .(r_id = lucid_rid, q = question, treatment = allcGentz, newdv,
      education_num = education, party)
][
  # only keep relevant questions
  str_detect(q, "allcGentz"),
]

# Clean question label
hr_l[, q := q |> str_remove("recode\\.allcGentz\\.")]

# Make dummy for treatment
hr_l[, treatment := fifelse(treatment == "Pos keyed", 1L, 0L)]

# Education: recode -3105 to missing
hr_l[education_num < 0, education_num := NA_real_]

# Assign education labels
hr_l[, education := fct_recode(
  as.character(education_num),
  "Some high school or less" = "1",
  "High school graduate" = "2",
  "Other post high school vocational training" = "3",
  "Completed some college, but no degree" = "4",
  "Associate's degree" = "5",
  "Bachelor's degree" = "6",
  "Master's or professional degree" = "7",
  "Doctorate degree" = "8"
)]

# Create ed4 variable
hr_l[, ed4 := fcase(
  education_num %in% 1:2, "HS or less",
  education_num %in% 3:4, "Some college",
  education_num %in% 5:6, "College",
  education_num %in% 7:8, 'Postgraduate'
) |> factor(levels = c('HS or less', 'Some college', 'College', 'Postgraduate'))]

# Save intermediate dataset
cat(sprintf("--Save proc/hr.rds (%s)\n", f_time_gmt()))
saveRDS(hr_l, here("proc", "hr.rds"))
